{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Collect synthesis condition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llm_miner import JournalReader\n",
    "from pathlib import Path\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "JR_path = '../11_final_output_re_re'\n",
    "files = list(Path(JR_path).glob('*/*.json'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_syn_condition(file, total_data):\n",
    "    try:\n",
    "        jr = JournalReader.from_json(file)\n",
    "    except Exception:\n",
    "        return\n",
    "    \n",
    "    try:\n",
    "        year = int(jr.metadata.date.split('.')[0])\n",
    "    except:\n",
    "        year = None\n",
    "\n",
    "    for result in jr.result:\n",
    "        if 'processes' not in result:\n",
    "            continue\n",
    "\n",
    "        data = {'year': year}\n",
    "        process = result['processes']\n",
    "        data['processes'] = process\n",
    "        \n",
    "        data['meta'] = result.material.to_dict()\n",
    "        data['doi'] = jr.doi\n",
    "\n",
    "        if not process:\n",
    "            continue\n",
    "\n",
    "        synthesis = False\n",
    "        precursors = []\n",
    "        solvents = []\n",
    "        for operation in process:\n",
    "            if operation['synthesis method'] in ['chemical synthesis', 'solvothermal synthesis', 'sonochemical synthesis', 'hydrothermal synthesis']:\n",
    "                if 'precursor' not in operation:\n",
    "                    continue\n",
    "\n",
    "                for pre in operation['precursor']:\n",
    "                    precursors.append(pre['name'])\n",
    "                \n",
    "                synthesis = True\n",
    "            else:\n",
    "                continue\n",
    "        \n",
    "        if synthesis:\n",
    "            data['precursor'] = precursors\n",
    "            total_data.append(data)\n",
    "        else:\n",
    "            continue\n",
    "\n",
    "\n",
    "    return total_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "total_data = []\n",
    "for file in files:\n",
    "    get_syn_condition(file, total_data=total_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "has_refcode_data = [\n",
    "    i for i in total_data if isinstance(i['meta']['refcode'], str)\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Precursor list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter = Counter()\n",
    "for data in has_refcode_data:\n",
    "    precursor = data['precursor']\n",
    "    counter.update(precursor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(counter.most_common())\n",
    "df.to_csv('precursor.csv', encoding='utf-8-sig')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load precursor_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel('assets/precursor.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_solvent = pd.read_excel('assets/solvent.xlsx').replace({np.nan: None})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "name_dict = {i['Name']: i['Check (M/N/X/O)'] for i in df.iloc()}\n",
    "solvent_dict = {i[\"name\"]: i[\"standard_name\"] for i in df_solvent.iloc()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "operations = ['ball milling', 'centrifugation', 'chemical mechanical polishing', 'chemical synthesis', 'chemical vapor deposition', 'drying', 'electrochemical deposition', 'heat treatment', 'microwave-assisted synthesis', 'mixing', 'rinsing', 'solvothermal synthesis', 'sol-gel synthesis', 'sonication', 'sonochemical synthesis', 'thermal evaporation', 'wet etching', 'washing', 'cooling', 'pH adjustment', 'filtration']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9197\n"
     ]
    }
   ],
   "source": [
    "sorted_output = []\n",
    "for data in has_refcode_data:\n",
    "    is_target = True\n",
    "    for pre in data['precursor']:\n",
    "        tag = name_dict.get(pre, 'X')\n",
    "        if tag not in ['M', 'N']:\n",
    "            is_target = False\n",
    "\n",
    "    for process in data['processes']:\n",
    "        name = process['synthesis method']\n",
    "        if name not in operations:\n",
    "            is_target = False\n",
    "\n",
    "    \n",
    "    if is_target:\n",
    "        sorted_output.append(data)\n",
    "\n",
    "print (len(sorted_output))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'year': 2019,\n",
       " 'processes': [{'synthesis method': 'solvothermal synthesis',\n",
       "   'precursor': [{'name': 'Zn(NO3)2⋅6H2O', 'amount': '45', 'unit': 'mg'},\n",
       "    {'name': 'H4abtc', 'amount': '0.0358', 'unit': 'g'}],\n",
       "   'solvent': [{'name': 'DMA', 'amount': '5.0', 'unit': 'mL'}],\n",
       "   'reducing agent': [{'name': '', 'amount': '', 'unit': ''}],\n",
       "   'surfactant': [{'name': '', 'amount': '', 'unit': ''}],\n",
       "   'pressure': '',\n",
       "   'temperature': '120°C',\n",
       "   'time': '72h',\n",
       "   'heating rate': '',\n",
       "   'cooling rate': '48h'},\n",
       "  {'synthesis method': 'filtration',\n",
       "   'time': '',\n",
       "   'atmosphere': '',\n",
       "   'pressure': ''},\n",
       "  {'synthesis method': 'washing', 'washing solution': 'DMA', 'amount': ''},\n",
       "  {'synthesis method': 'drying',\n",
       "   'pressure': '',\n",
       "   'temperature': '',\n",
       "   'atmosphere': '',\n",
       "   'time': ''}],\n",
       " 'meta': {'name': '{[Zn2(abtc)(H2O)3](DMA)}n',\n",
       "  'symbol': '1',\n",
       "  'chemical_formula': 'C16 H6 N2 O11 Zn2',\n",
       "  'formula_source': 'csd',\n",
       "  'synonyms': [],\n",
       "  'refcode': 'JOPKAD',\n",
       "  'is_general': True},\n",
       " 'doi': '10.1007/s10876-019-01615-7',\n",
       " 'precursor': ['Zn(NO3)2⋅6H2O', 'H4abtc']}"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted_output[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "year_counter = Counter()\n",
    "for data in sorted_output:\n",
    "    if isinstance(data['year'], int):\n",
    "        year_counter.update([data['year']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "years, counts = zip(*year_counter.most_common())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5, 0, 'Year')"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAG0CAYAAAA2BP2yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA5tUlEQVR4nO3df3zP9f7/8fu7mW3vMbExo43kNyEaw5KU5EenxKHkOFPJr5STCqmkYr6nSCeF1OFEUfpxKofSoY4fWY5R+THEZMNWvWesmb2xvb5/+Hhf2tmwvd6vt7332u16ubwv9X49X8/3Hq+n58XuXu/n6/VyGIZhCAAAwMauKO8CAAAAfI3AAwAAbI/AAwAAbI/AAwAAbI/AAwAAbI/AAwAAbI/AAwAAbK9KeRfgLwoLC3X06FFVr15dDoejvMsBAAClYBiGfvvtN9WrV09XXHHh8zgEnv9z9OhRRUdHl3cZAADAhPT0dF111VUXbCfw/J/q1atLOjdgYWFh5VwNAAAojZycHEVHR3t+j18Igef/nP8aKywsjMADAEAFc6nlKCxaBgAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtkfgAQAAtlelvAsAAHgvLS1NLperzP0iIiIUExPjg4oA/0LgAYAKLi0tTc2at1D+qbwy9w0OcWrvnhRCD2yPwAMAFZzL5VL+qTyF95ugwPDoUvc7k5WurJWz5HK5CDywPQIPANhEYHi0guo2Lu8yAL/EomUAAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7BB4AAGB7Vcq7AABA+UpJSSlzn4iICMXExPigGsA3CDwAUEkV5GZLDoeGDh1a5r7BIU7t3ZNC6EGF4VeBJz09XaNHj9b69etVq1YtjR8/XuPHj5ckbd++XaNGjdKOHTvUqlUrzZ8/Xx06dPD0XbZsmZ566illZGSoV69eWrhwoSIiIsrpSADA/xW6cyXDUHi/CQoMjy51vzNZ6cpaOUsul4vAgwrDrwLPoEGD1KBBAyUnJ2v37t0aMmSIGjRooFtvvVV9+vTRvffeq8WLF2v+/Pnq27evDhw4oNDQUG3ZskX333+/5s+fr3bt2unhhx9WQkKCVq5cWd6HBAB+LzA8WkF1G5d3GYBP+U3gyc7OVlJSkhYuXKgmTZqoSZMmuu2227R27VplZ2crJCREL774ohwOh+bMmaNVq1ZpxYoVSkhI0Ny5czVo0CANGzZMkrRkyRI1aNBABw8e1NVXX13ORwYAOC8tLU0ul6vM/VgzBG/5TeAJCQmR0+nUokWLNHPmTKWmpmrTpk2aPn26kpKSFB8fL4fDIUlyOBzq2rWrNm/erISEBCUlJWnSpEmez4qOjlZMTIySkpIIPADgJ9LS0tSseQvln8orc1/WDMFbfhN4goOD9dprr+mhhx7SK6+8ooKCAiUkJOj+++/XP//5T7Vq1arI/pGRkdq5c6ckKSMjQ/Xq1SvWfvjw4Qv+PLfbLbfb7Xmfk5Nj4dEAAP6Xy+VS/qk81gyhXPhN4JHOXRp5++23a8KECdq5c6fGjRunW265RXl5eQoKCiqyb1BQkCewXKq9JImJiZo2bZr1BwEAuCjWDKE8+E3gWbt2rd58800dPnxYISEhuv7663XkyBG98MILatSoUbHw4na75XQ6JZ07O3Sx9pJMnjxZjz76qOd9Tk6OoqNL/y8OAABQcfjNnZaTk5PVpEkThYSEeLZdd911OnTokOrXr6/MzMwi+2dmZioqKkqSLtlekqCgIIWFhRV5AQAAe/KbwFOvXj3t379fp0+f9mzbs2ePrr76asXFxembb76RYRiSJMMwtGnTJsXFxUmS4uLitHHjRk+/9PR0paene9oBAEDl5jeB5/bbb1dgYKAeeOAB7du3T5999plmzJihhx9+WAMHDtTx48c1fvx47d69W+PHj9fJkyc1aNAgSdLo0aO1ZMkSvfXWW/rhhx80bNgw9evXjyu0AACAJD8KPDVq1NDatWuVkZGh2NhY/eUvf9FTTz2lBx98UGFhYVq5cqU2bNigDh06KCkpSatWrVJoaKgkqXPnzlqwYIGmTZumLl26qGbNmlq0aFE5HxEAAPAXfrNoWZJatmypL7/8ssS2jh07atu2bRfsm5CQoISEBB9VBgAAKjK/OcMDAADgKwQeAABgewQeAABge361hgcA7IKHZAL+hcADABbjIZmA/yHwAIDFeEgm4H8IPADgIzwkE/AfLFoGAAC2R+ABAAC2R+ABAAC2R+ABAAC2R+ABAAC2R+ABAAC2x2XpAABTUlJSfLo/YCUCDwCgTApysyWHQ0OHDi17Z4fjXH/gMiPwAADKpNCdKxmG6TtJF7pzfVgdUDICDwDAFO4kjYqERcsAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2CDwAAMD2qpR3AQAAlEZKSkqZ+0RERCgmJsYH1aCiIfAAAPxaQW625HBo6NChZe4bHOLU3j0phB4QeAAA/q3QnSsZhsL7TVBgeHSp+53JSlfWyllyuVwEHhB4AAAVQ2B4tILqNi7vMlBBsWgZAADYHoEHAADYHoEHAADYHoEHAADYnulFy4Zh6KefftLVV18tSdq3b58WLlyoKlWqaPjw4WratKllRQIAAHjDVOA5fPiwevXqpaCgIG3btk0///yzOnXqpBMnTkiS5s6dq/Xr1+u6666ztFgAAAAzTH2l9eSTTyo9PV2jR4+WJC1cuFAnTpzQ+++/r4MHDyo6OlpTp061tFAAAACzTAWeNWvWaPz48RoxYoQk6dNPP1V0dLQGDhyoBg0aaMSIEdq4caOlhQIAAJhl6iutEydOeNbu/PLLL0pOTtYDDzzgaQ8NDdXZs2etqRAAKpmyPjPKzDOmgMrGVOBp0KCBduzYIUlavny5JOn222/3tH/++eeeQAQAKB1vnhklh+NcfwAlMhV4hgwZoueee0779+/XunXrFBMTo9tuu00HDhzQX/7yF/3rX//S7Nmzra4VAGzN22dGFbpzfVgdULGZCjzPPPOMAgIC9O6776pr16566aWXVKVKFeXk5Gj9+vV66qmn9Mgjj1hdKwBUCjwzCrCe6fvwTJkyRVOmTCmyrW3btvr1118VGBjodWEAAABW8epp6S6XS19++aXS0tI0ePBghYaGyuVyqUWLFlbVBwAA4DXTj5aYNWuWYmJidO+99+rJJ59UamqqvvnmG7Vu3Vpjx46VYRhW1gkAAGCaqcDz7rvv6vHHH1f//v21YsUKT7hp3769+vfvr/nz5+vVV1+1tFAAAACzTAWel156ST179tQ777yj7t27e7ZHR0frgw8+UJ8+fbRw4UKragQAAPCKqcCTkpKiO+6444Ltt99+u1JTU00XBQAAYCVTgad69eo6fvz4BdsPHTqkatWqma0JAADAUqYCz2233abXX39dv/zyS7G2H374Qa+99pp69uzpdXEAAABWMBV4EhMTZRiGWrVqpQcffFAOh0MLFizQXXfdpdjYWAUGBuq5556zulYAAABTTAWe+vXra+vWrerbt6/WrVsnwzC0YsUKrVmzRnfccYe+/fZbNWrUyOpaAQAATDF148E1a9YoLi5OixcvlmEYcrlcKigoUO3atRUQEGB1jQAAAF4xdYbnnnvuUWJioiTJ4XCodu3aqlu3rtdhx+12a+zYsapZs6YiIyP15JNPeu7xs337dnXq1ElOp1OxsbFKTk4u0nfZsmW65ppr5HQ61b9/f7lcLq9qAQAA9mEq8BiGoXr16lldix555BF9+eWX+uKLL/Tuu+9q4cKFeuONN3Ty5En16dNHN9xwg5KTk9WlSxf17dtXJ0+elCRt2bJF999/v6ZOnaqkpCRlZ2crISHB8voAAEDFZOorrenTp2v69OmKiorSDTfcoDp16sjhcHhVyLFjx/TWW2/p3//+tzp27ChJmjBhgr799lsFBgYqJCREL774ohwOh+bMmaNVq1ZpxYoVSkhI0Ny5czVo0CANGzZMkrRkyRI1aNBABw8e1NVXX+1VXQAqt7S0tDKfMU5JSfFRNQDMMhV4/va3vyk7O1uDBw++4D4Oh0Nnz54t9Wdu3LhRNWrU0I033ujZNmnSJEnSgw8+qPj4eE+ocjgc6tq1qzZv3qyEhAQlJSV59pXO3fE5JiZGSUlJBB4ApqWlpalZ8xbKP5VX9s4Ohwpys60vCoAppgJPXFyc4uLiLC0kNTVVDRs21Ntvv60ZM2bo9OnTGj58uKZMmaKMjAy1atWqyP6RkZHauXOnJCkjI6PYV2yRkZE6fPjwBX+e2+2W2+32vM/JybHwaADYgcvlUv6pPIX3m6DA8OhS9zuTla6slbNU6M71YXUAysJU4Fm0aJHVdSg3N1c//vijFixYoEWLFikjI0MjR46U0+lUXl6egoKCiuwfFBTkCSyXai9JYmKipk2bZvlxALCfwPBoBdVtXN5lAPCCqcDjC1WqVFFOTo7effddNWjQQNK508mvv/66mjRpUiy8uN1uOZ1OSVJwcPBF20syefJkPfroo573OTk5io4u/b/gAABAxWEq8FxxxRWlWqRcUFBQ6s+MiopScHCwJ+xIUrNmzZSenq7u3bsrMzOzyP6ZmZmKioqSdO5GiBdrL0lQUFCxs0IAAMCeTAWeYcOGFQs8BQUFyszM1DfffKPo6Gg98MADZfrMuLg45efna9++fWratKmkc1c6NGzYUHFxcZo5c6YMw5DD4ZBhGNq0aZOmTJni6btx40bPpejp6elKT0+3fJ0RAAComEwFnsWLF1+w7fDhw+ratWuZn5berFkz9e3bVwkJCZo3b54yMzM1c+ZMPfXUUxo4cKAmTZqk8ePHa+TIkVqwYIFOnjypQYMGSZJGjx6t7t27q3PnzoqNjdUjjzyifv36cYUWAACQZPLGgxdz1VVXaezYsZo1a1aZ+77zzjtq3Lix4uPjNWzYMD300EMaN26cwsLCtHLlSm3YsEEdOnRQUlKSVq1apdDQUElS586dtWDBAk2bNk1dunRRzZo1fbKwGgAAVEw+WbQcEhKi9PT0MverUaOG3n777RLbOnbsqG3btl2wb0JCAndXBgAAJbI08Jw5c0bfffedXn75ZTVr1szKjwYAADDNZ1dprVixwlRBAAAAVrPsKi1JCggIUN26dTV48GBde+21XhcHAABgBcuv0jrv9OnTqlq1qpmPBwDAMmYe5hoREaGYmBgfVIPyYirwNGrUSK+88opuv/32EtuXLVumcePGlfkJwwAAWKUgN1tyODR06NAy9w0OcWrvnhRCj42UKvC4XC7t3r3b8/6nn37Sli1bVKNGjWL7FhYW6uOPP1Z+fr51VQIAUEaF7lzJMEw//NXlchF4bKRUgSc4OFhDhgxRRkaGJMnhcGjGjBmaMWNGifsbhqHBgwdbVyUAACbx8FdIpQw81apV06effqodO3bIMAzdd999evDBB9W5c+di+wYEBKh27dq6+eabLS8WAADAjFKv4Wnfvr3at28vSTp06JAGDBig1q1b+6wwAAAAq5h6tMTUqVPldrt1991365dffvFsf+yxxzRw4EDt2bPHsgIBAAC8ZSrwbNy4UfHx8VqzZk2RK7GioqK0ceNGxcbG6ocffrCsSAAAAG+YCjxPP/20mjVrpv3796tly5ae7RMmTFBKSooaNWqkSZMmWVYkAACAN0wFnu3bt2vkyJGqVatWsbaaNWtqxIgR+vbbb70uDgAAwAqmAk9gYOBFbyqYk5OjwsJC00UBAABYyVTguemmm/Tqq6/q4MGDxdqOHDmiuXPnqnv37t7WBgAAYAlTj5Z4/vnn1bFjR1177bXq06ePmjRpIofDoQMHDmjVqlWeGxMCAAD4A1OBp1mzZkpOTtaUKVO0evVqffDBB5KkkJAQ3XrrrUpMTFTz5s0tLRQAAMAsU4FHkho3bqz33ntPhmEoKytLBQUFioiIUEBAgJX1AQAAeM104DnP4XAoIiLCiloAAAB8wlTgadSoUan2S01NNfPxAAAAljIVeGJiYuRwOIpsKygoUGZmpvbv36+mTZuqZ8+elhQIAADgLVOB5+uvv75gW3Jysm677TYuSwcAAH7D1H14LqZDhw566KGH9Nxzz1n90QAAAKZYHngkqW7dutq3b58vPhoAAKDMLA88mZmZmjdvnho0aGD1RwMAAJhi6VVabrdbv/zyiwoKCvT66697VRgAAIBVLLtKS5ICAgJ000036Z577lHfvn29Lg4AAMAKpgLPa6+9ppYtW5YYegAAAPyNqTU8N998s5588kmrawEAAPAJU4Hn5MmTatiwocWlAAAA+IapwDN+/HjNnj1bW7dutboeAAAAy5law7N161YdPXpUnTp1UkhIiMLDw4s9Jd3hcOjAgQOWFAkAAOANU4Hn1KlTuv76662uBQAAwCcsf5YWAACAvzG1hqdHjx5au3btBds/++wztW7d2nRRAAAAVirVGZ68vDy5XC7P+6+//lr9+/dXkyZNiu1bWFio1atXKzU11boqAQAAvFCqwHPy5Em1a9dOJ06ckHRuQfL48eM1fvz4Evc3DEM9e/a0rEgAAABvlCrw1K5dW++88462bNkiwzD03HPPqX///mrTpk2xfQMCAlS7dm3dfffdlhcLAABgRqkXLffu3Vu9e/eWJB06dEijRo1Sp06dfFYYAACAVUxdpbVo0aISt+/atUsBAQFq3ry5V0UBAABYydRVWpI0c+ZMDR8+XNK5hcp9+/ZVmzZt1KpVK/Xq1Uu5ubmWFQkAAOANU4HnxRdf1JNPPqmff/5ZkvT+++9r9erVGjBggJ555hlt2LBBzz33nKWFAgAAmGXqK63Fixerf//++vDDDyVJ7733npxOp/7xj38oJCREubm5WrFihf76179aWiwAAIAZps7wpKamqk+fPpKkM2fOaO3aterevbtCQkIkSS1atFBmZqZ1VQIAAHjBVOCpWbOm5548X331lXJzcz0BSJL279+vyMhIayoEAADwkqmvtDp37qy5c+eqYcOGmj59ugIDA3XXXXfpzJkz+uyzzzRv3jz179/f6loBAABMMXWGZ86cOQoODtbAgQP13XffKTExUXXr1tWmTZs0cOBARUVFsWgZAAD4DVNneKKjo/XDDz9o+/btqlevnurXry9Jatu2rZYtW6bbb79dTqfT0kIBAADMMhV4JKlKlSqKjY0tsq1mzZoaPHiw10UBAABYyfSNBwEAACoKAg8AALA9Ag8AALC9UgWef/3rX57HSAAAAFQ0pQo89957r/71r3953vfo0UNr1671WVEAAABWKlXgMQxDGzZsUF5eniTp66+/1i+//OLTwgAAAKxSqsvSBwwYoMWLF+vtt9/2bBs6dKiGDh16wT4Oh0Nnz571vkIAAMpBSkqKqX4RERGKiYmxuBp4q1SBZ/78+Wrfvr127Nght9utJUuWKD4+Xo0aNfJ1fQAAXFYFudmSw3HRf9RfTHCIU3v3pBB6/EypAk/VqlX10EMPed6//fbbGjlypIYMGeKzwgAAKA+F7lzJMBTeb4ICw6PL1PdMVrqyVs6Sy+Ui8PgZU3daLiws9Pz/zz//rEOHDqlq1aq66qqrFBERYVlxAACUl8DwaAXVbVzeZcAipu/Dk5ycrLi4ONWrV0+dO3dWhw4dFBkZqS5dumjr1q1W1ggAAOAVU2d4duzYoe7du8swDD344INq0aKFCgsLtWfPHr3zzjvq3r27vv32W7Vq1crqegEAAMrMVOB56qmnVK1aNSUlJalBgwbF2jp27Khp06bp/ffft6RIAAAAb5j6Smv9+vUaO3ZssbAjSVdddZVGjx6tr776yuviAAAArGAq8LjdblWvXv2C7WFhYZ6bFAIAAJQ3U4Hnuuuu07vvvlvijQXPnDmjd955R9dee63XxQEAAFjB1BqeJ554Qv3791e3bt00YcIENW3aVJK0Z88ezZ49W8nJyXrvvfcsLRQAAMAsU4Hnjjvu0KuvvqqJEydq0KBBnu2GYSg4OFizZs3SwIEDLSsSAADAG6YCjySNHTtW99xzj9auXauDBw/KMAw1bNhQPXv2VK1ataysEQAAwCumA48k1apVS3/84x+tqgUAAMAnTN9p2df69u2rhIQEz/vt27erU6dOcjqdio2NVXJycpH9ly1bpmuuuUZOp1P9+/eXy+W6zBUDAAB/5ZeBZ/ny5Vq1apXn/cmTJ9WnTx/dcMMNSk5OVpcuXdS3b1+dPHlSkrRlyxbdf//9mjp1qpKSkpSdnV0kLAEAgMrN7wLPsWPH9Pjjjys2Ntaz7b333lNISIhefPFFtWjRQnPmzFH16tW1YsUKSdLcuXM1aNAgDRs2TG3atNGSJUu0atUqHTx4sLwOAwAA+BFTgef3T0u32mOPPaY//elPatmypWdbUlKS4uPj5XA4JEkOh0Ndu3bV5s2bPe3dunXz7B8dHa2YmBglJSX5rE4AAFBxmAo8bdu21SuvvGJ1LVq3bp3Wr1+vp59+usj2jIwM1atXr8i2yMhIHT58uFTtJXG73crJySnyAgAA9mQq8Pz4449yOp2WFpKfn6+RI0fqtddeU0hISJG2vLw8BQUFFdkWFBQkt9tdqvaSJCYmqkaNGp5XdHS0RUcCAAD8janA06tXL3300Uc6ffq0ZYVMmzZN119/vXr16lWsLTg4uFh4cbvdntB1qfaSTJ48WSdOnPC80tPTLTgKAADgj0zdh6dt27aaM2eOoqKi1LFjR9WpU0cBAQFF9nE4HHrrrbdK/ZnLly9XZmamqlWrJkmeAPPBBx9oyJAhyszMLLJ/ZmamoqKiJEn169e/aHtJgoKCip0VAgAA9mQq8Lzwwgue///iiy9K3Kesgefrr7/WmTNnPO8nTpwoSfp//+//af369Zo5c6YMw5DD4ZBhGNq0aZOmTJkiSYqLi9PGjRs9l6Knp6crPT1dcXFxZT00AABgQ6YCjy+u0mrQoEGR99WrV5ckNW7cWHXq1NGkSZM0fvx4jRw5UgsWLNDJkyc9z/EaPXq0unfvrs6dOys2NlaPPPKI+vXrp6uvvtryOgGUn7S0NNM3FY2IiFBMTIzFFQGoKLx6tIR0Lvz8+uuvqlmzpqpWrWpFTcWEhYVp5cqVGjVqlN544w21adNGq1atUmhoqCSpc+fOWrBggZ555hkdO3ZMt956qxYuXOiTWgCUj7S0NDVr3kL5p/JM9Q8OcWrvnhRCD1BJmQ48+/fv18SJE/XFF1/o1KlT+vLLLyWdWww8a9YsxcfHe1XY4sWLi7zv2LGjtm3bdsH9ExISuLsyYGMul0v5p/IU3m+CAsPLdlXlmax0Za2cJZfLReABKilTgefHH39Up06d5HA41Lt3b3300UeSpICAAO3Zs0c9e/bUV199xRoaAJYLDI9WUN3G5V0GgArG1GXpkydPltPpVEpKiubNmyfDMCRJN954o3bv3q26detq2rRplhYKAABglqkzPOvWrdOECRNUp04dZWVlFWmrX7++xowZo7/+9a+WFAgAVklJSfHp/gD8l6nA43a7VbNmzQu2V61aVadOnTJdFABYqSA3W3I4NHTo0LJ3djjO9QdQoZkKPO3atdOnn36qMWPGFGs7e/asli5dqjZt2nhdHABYodCdKxlGmRc8n1/sXOjO9WF1AC4HU4Fn8uTJuuOOOzR06FDdcccdkqSffvpJn376qV588UVt27ZN77//vqWFAoC3WPAMVF6mAk+/fv301ltvafz48Vq2bJkkacSIETIMQ8HBwZo1a5YGDBhgaaEAAABmmb4PT0JCggYMGKA1a9YoNTVVBQUFatiwoXr27Knw8HArawQAAPCKV3darl69uu666y79+uuvCggIIOgAAAC/ZOo+PNK5yzX/+Mc/qkaNGoqKilKdOnVUq1YtDR8+XIcPH7ayRgAAAK+YOsOzdetW3XTTTXK73erTp4+uueYaGYahvXv3aunSpVq9erU2bdqka665xup6AQAAysxU4Jk4caLCwsK0fv36YqFm586duummm/TYY4/p448/tqRIAAAAb5j6SispKUmPPPJIiWdwWrdurUceeURr1671ujgAAAArmDrDU7NmTZ09e/aC7dWrV1dISIjpogAAqMjMPJYkIiJCMTExPqgGksnAM3bsWM2ePVt33nmnWrZsWaTt6NGj+tvf/qYHH3zQkgIBAKgovHmMSXCIU3v3pBB6fKRUgee+++4rti0/P1/t2rVT79691axZMzkcDv30009avXq1goODLS8UAAB/5+1jTFwuF4HHR0oVeBYvXnzBts8++0yfffZZkW25ubmaMWOGnn/+ea+KAwCgIuIxJv6nVIGnsLDQ13UAAAD4jOkbDwIAAFQUph8tsWTJEq1Zs0YZGRklngFyOBxcmg4AAPyCqcAzZcoUJSYmqmrVqqpTp44CAgKsrgsAAMAypgLPP/7xD/Xq1UsffvihnE6n1TUBAABYytQanpycHA0YMICwAwAAKgRTgee2227TunXrrK4FAADAJ0x9pfXqq6/qlltu0b333qs777xTderUkcPhKLZft27dvC4QAADAW6YCT1pamk6cOKFly5Zp+fLlxdoNw5DD4VBBQYHXBQIAAHjL9LO0jh8/rscff1xNmzZVlSqmr24HAADwOVNJZefOnXr22Wf1xBNPWF0PAACA5UwtWo6OjtYVV3CTZgAAUDGYSi1PPPGE5syZo927d1tdDwAAgOVMfaX13XffyeFwqE2bNrrmmmsUGRlZbB0Pj5YAAAD+wlTgWblypapUqaLo6GidPn1a6enpVtcFAABgGVOB5+DBg1bXAQAA4DOsPAYAALZn6gxPjx49SrUfj58AAAD+wFTgSU1NLfYoiYKCArlcLuXn56thw4Zq3bq1JQUCAAB4y1Tg+emnn0rcXlBQoE8++UQPPPCAHnvsMW/qAgAAsIyla3gCAgJ01113acSIEZo4caKVHw0AAGCaTxYtN2nSRN9//70vPhoAAKDMLA88brdbS5cuVZ06daz+aAAAAFMsvUrL7XZr7969ys7O1rRp07wqDAAAwCqWXaUlnVvD07x5c91zzz0aM2aM18UBAABYwdKrtAAAAPwRd1oGAAC2V6ozPM8995ypD3/mmWdM9QMAALBSqQLPs88+W6oP+991PQQeAADgD0oVeErzdPScnBxNmTJFK1euVJUqVTR+/HhvawMAALBEqQJPgwYNLtr+/vvv69FHH9XRo0fVtWtXzZs3j2dpAQAAv+HVouXU1FT17t1b99xzj9xut958801t2LCBsAMAAPyKqcvSz5w5o5kzZ2rmzJnKz89XQkKC/vrXvyo8PNzq+gAAqDRSUlLK3CciIkIxMTE+qMZeyhx4vvrqK40ZM0Z79+5Vq1atNG/ePMXHx/uiNgAAKoWC3GzJ4dDQoUPL3Dc4xKm9e1IIPZdQ6sDz66+/6i9/+YuWLVumkJAQzZw5U48++qiqVDF1kggAAPyfQneuZBgK7zdBgeHRpe53JitdWStnyeVyEXguoVRpZf78+ZoyZYqOHz+uP/zhD/rb3/6m6OjS/4EAAIBLCwyPVlDdxuVdhi2VatHymDFjlJ2dLcMw9Omnn6phw4YKCAi46IszPwAAwF+UKpUMGzasxIeFAgAAVASlCjyLFy/2cRkAAAC+w8NDAQCA7RF4AACA7RF4AACA7RF4AACA7RF4AACA7RF4AACA7RF4AACA7XE7ZACXXVpamlwuV5n6mHmKNACcR+ABcFmlpaWpWfMWyj+VV/bODse5p0oDQBkReABcVi6XS/mn8kw/FbrQnevD6gDYFYEHQLngqdAALicWLQMAANsj8AAAANvzq8Bz5MgRDRw4ULVq1VL9+vX16KOPKj8/X5J08OBB3XLLLQoNDVXLli21Zs2aIn3//e9/q3Xr1nI6nerRo4dSU1PL4xAAAIAf8pvAYxiGBg4cqLy8PG3YsEHLly/XZ599pqefflqGYejOO+9U3bp1tXXrVv3pT39S//79lZaWJuncVR933nmnhg8frv/+97+qXbu27rzzThmGUc5HBQAA/IHfLFreu3evkpKSlJmZqcjISEnSc889p8cee0y9e/fWgQMH9M033yg0NFQtWrTQ2rVr9fe//13PPvus3nzzTV1//fWaMGGCJGnRokWqW7eu/vOf/6h79+7leFQAAMAf+M0Znrp16+rzzz/3hJ3zTpw4oaSkJLVv316hoaGe7fHx8dq8ebMkKSkpSd26dfO0OZ1OtW/f3tMOAAAqN785w3PllVeqV69enveFhYWaO3eubr75ZmVkZKhevXpF9o+MjNThw4cl6ZLtJXG73XK73Z73OTk5VhwGAADwQ35zhud/PfHEE9q2bZumT5+uvLw8BQUFFWkPCgryBJZLtZckMTFRNWrU8Lyio0t/AzQAAFCx+GXgmThxoubMmaOlS5eqdevWCg4OLhZe3G63nE6nJF2yvSSTJ0/WiRMnPK/09HTrDwQAAPgFv/lK67xx48Zp3rx5Wrp0qQYMGCBJql+/vnbt2lVkv8zMTEVFRXnaMzMzi7W3a9fugj8nKCio2FkhAABgT351hmfatGmaP3++li9frrvvvtuzPS4uTtu2bdOpU6c82zZu3Ki4uDhP+8aNGz1teXl52r59u6cdAABUbn4TeFJSUvT8889r0qRJio+PV2Zmpud14403Kjo6WsOHD9euXbs0c+ZMbdmyRffff78k6b777tOmTZs0c+ZM7dq1S8OHD9fVV1/NJekAAECSHwWeTz75RAUFBXrhhRcUFRVV5BUQEKBPPvlEGRkZ6tChg5YuXaqPP/5YMTExkqSGDRvqo48+0qJFixQbG6usrCz985//lMPhKOejAgAA/sBv1vBMmjRJkyZNumB748aN9Z///OeC7b1791bv3r19URoAAKjg/OYMDwAAgK8QeAAAgO0ReAAAgO0ReAAAgO0ReAAAgO0ReAAAgO35zWXpACqetLQ0uVyuMvVJSUnxUTUAcGEEHgCmgktGRoYGDBgotzu/7D/Q4VBBbnbZ+wGASQQeoJJLS0tTs+YtlH8qr+ydHQ5defMIBV/VqtRdzmSlK2vlLBW6c8v+8wDAJAIPUMm5XC7ln8pTeL8JCgyPLnW/88ElICRMQXUb+7BCAPAegQeAJCkwPJrgAsC2uEoLAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYHoEHAADYXpXyLgCAddLS0uRyucrUJyUlxUfVAID/IPAANpGWlqZmzVso/1Re2Ts7HCrIzba+KADwEwQewCZcLpfyT+UpvN8EBYZHl7rfmax0Za2cpUJ3rg+rA4DyReAB/JA3X00FhkcrqG5jX5QFABUWgQfwM3w1BQDWI/AAfoavpgDAegQewE/x1RQAWIf78AAAANsj8AAAANsj8AAAANsj8AAAANsj8AAAANsj8AAAANsj8AAAANsj8AAAANsj8AAAANvjTstAKZh5mKckRUREKCYmxgcVAQDKgsADXII3D/MMDnFq754UQg8AlDMCD3AJ3j7M0+VyEXgAoJwReIBS4mGeAFBxsWgZAADYHoEHAADYHl9pAT6WkpLi0/0BAJdG4AF8pCA3W3I4NHTo0LJ3djjO9QcAWILAA/hIoTtXMgzTV3cVunN9WB0AVC4EHsDHuLoLAMofgQeVipk7JrOmBoAdVbY7yBN4UGl4c8dk1tQA8Gdl/YdZRkaGBgwYKLc7v8w/q6LeQZ7Ag0rD2zsms6YGgL/x9uKIK28eoeCrWpW6S0W+gzyBB5UOa2oA2IW3F0cEhIRVmr8PCTwAAFRw/EPu0mwVePLz8zV27Fh9+OGHCgkJ0WOPPaYJEyaUd1kAANiKmYs5ynuxs60Cz+OPP66tW7dq3bp1OnTokP785z+rQYMGGjhwYHmXBgBAhefNmqHyXuxsm8Bz8uRJvfnmm1q9erXat2+v9u3ba9euXZo7dy6Bx4a4vBwALj9v1wyV52Jn2wSe77//XmfOnFGXLl082+Lj4zV9+nQVFhbqiivK7zmple1eB2VhZmy8uZySy8sBwHsVcc2QbQJPRkaGIiIiVLVqVc+2yMhI5efnKysrS7Vr1y6yv9vtltvt9rw/ceKEJCknJ8fSutLT09Xh+li580+VuW9QcIiSt/5X0dGlT9EViTdjI0nV2vZSYO2Gpd6/4MSvyvnvR3IfTZGjakip+505dvjcf7PSdSptZ+n7ZaVLktyZ+1V4uvThzPTPo9+F+1aUP4vLXafZn2f3Ok32K4+fWWH+LP7v5+Xm5lr+e/b85xmGcfEdDZt4++23jZiYmCLbDhw4YEgy0tPTi+0/depUQxIvXrx48eLFywavkn7X/55tzvAEBwcXOWMjyfPe6XQW23/y5Ml69NFHPe8LCwt17NgxhYeHy+FwlOln5+TkKDo6Wunp6QoLCzNRvT0xLhfG2JSMcbkwxqZkjMuFVZaxMQxDv/32m+rVq3fR/WwTeOrXry+Xy6WzZ8+qSpVzh5WZmamQkBBdeeWVxfYPCgpSUFBQkW0l7VcWYWFhtp5UZjEuF8bYlIxxuTDGpmSMy4VVhrGpUaPGJfcpv5W8FmvXrp0CAwOVlJTk2bZx40bFxsaW64JlAABQ/mxzhsfpdOrPf/6zRo0apUWLFunIkSN66aWXtGjRovIuDQAAlDPbBB5Jmj17tkaPHq2bbrpJNWrU0LRp03TXXXf5/OcGBQVp6tSpxb4iq+wYlwtjbErGuFwYY1MyxuXCGJuiHIZxqeu4AAAAKjYWtwAAANsj8AAAANsj8AAAANsj8FyA2+1W69at9fXXX3u2JScnq3PnzqpWrZri4uKKXAIvSUuWLFGzZs0UFham/v37KzMzs0j7nDlzVL9+fVWvXl3333+/8vLyLsehWM7qscnOzpbD4SjyioiIuFyH47UjR45o4MCBqlWrlurXr69HH31U+fnnbrl+8OBB3XLLLQoNDVXLli21Zs2aIn3//e9/q3Xr1nI6nerRo4dSU1OLtFfkOeOrcano80XybmzOe+edd9S9e/di2yvrnDmvpHGp7HNm0aJFat68uapVq6ZOnTpp06ZNRdor8pwpE+se7mAfp06dMvr3729IMr766ivDMAzj559/NmrUqGGMGDHCSElJMWbPnm1Uq1bNOHTokGEYhvH5558bAQEBxquvvmqkpKQYQ4cONdq1a2cUFBQYhmEYH3zwgVGjRg3js88+M7Zs2WK0bNnSGDt2bHkdomm+GJuNGzca4eHhRkZGhuf1888/l9chlklhYaERFxdn9O7d29i5c6exfv16o3HjxsZjjz1mFBYWGm3atDHuvfdeY/fu3caMGTMMp9PpGZdDhw4ZoaGhxksvvWTs3LnTGDRokHHttdcahYWFhmFU7Dnjy3GpyPPFMLwbm/PWrVtnOJ1O48YbbyyyvbLOmfMuNC6Vec6sXr3aCAkJMZYuXWr8+OOPxlNPPWWEhYUZR44cMQyjYs+ZsiLw/I9du3YZbdu2Ndq0aVPkl/qLL75oNGrUyDh79qxn39tuu82YNGmSYRiG0bdvX2PYsGGetry8PKNWrVrG559/bhiGYdxwww3G1KlTPe0bNmwwQkJCjJMnT/r+oCziq7FZuHCh0blz58t3IBZKSUkxJBmZmZmebe+++65Rr149Y+3atUZoaKiRm5vrabv55ps98+Dpp58u8hfzyZMnjerVq3vGtSLPGV+OS0WeL4bh3dgYhmE8++yzRlBQkNG6detiv9gr65wxjIuPS2WeM4MHDzZGjRpV5POaNm1qvPHGG4ZhVOw5U1Z8pfU//vOf/+imm27S5s2bi2xPTU1Vhw4dFBAQ4NnWpk0bz36pqanq1KmTpy0kJESNGzfW5s2bVVBQoP/+97/q1q2bpz0uLk6nT5/W999/7+Mjso4vxkaSdu/eraZNm16GI7Be3bp19fnnnysyMrLI9hMnTigpKUnt27dXaGioZ3t8fLznuJOSkorMCafTqfbt29tizvhqXKSKPV8k78ZGkr788kt98cUXGjBgQJH+lXnOSBceF6lyz5knnniiyHMjf9+3os+ZsrLVjQetMHr06BK3R0ZGFpsA6enpcrlcnvYjR4542goLC3XkyBG5XC4dP35c+fn5RR5sVqVKFYWHh+vw4cM+OArf8MXYSFJKSorOnDmjjh076siRI7rhhhv08ssvKyoqykdHYp0rr7xSvXr18rwvLCzU3LlzdfPNNysjI6PYw+wiIyM9f+YXa6/oc8ZX4yJV7PkieTc20rlH5kjSV199VWS/yjxnpAuPi1S550z79u2LtH3++efat2+fevToUeHnTFlxhqeUBgwYoG+//VYLFy7U2bNn9cUXX+iTTz7R6dOnJUmDBw/WvHnztHnzZp05c0YzZszQzz//rNOnT3sWgP3v3S6DgoKKPeG9IvJmbCRpz549ysnJ0csvv6z33ntPR48eVb9+/VRQUFCeh2XKE088oW3btmn69OnKy8u76J/5xdrtNmesGhfJXvNFKtvYXExlnjOXwpw558CBA0pISNC9996r9u3b227OXApneEqpdevWWrhwoR5++GGNGjVK7dq105gxYzz/mhgxYoR27NihG264QZI0cOBA9enTR2FhYQoODpakYhPI7XbL6XRe3gPxAW/GRpJ27dolh8OhkJAQSdIHH3ygqKgoffvtt+rSpUv5HJQJEydO1Jw5c/Tee++pdevWCg4OVlZWVpF9fv9nHhwcXOKcuPLKK201Z6wcF8k+80Uq+9hcTGWeM5fCnJH27dunW265Rddcc40WLlwoyV5zpjQ4w1MGw4cP1/Hjx3X48GElJyfL4XCoYcOGkqSAgAC99tprysnJ0S+//KLly5crIyNDDRs2VHh4uIKDg4tcin327FllZWVVmFOql2J2bKRzazTO/0UkSXXq1FF4eHiRr8H83bhx4zRr1iwtXbrUs4agfv36xW5NkJmZ6fkzv1i7XeaM1eMi2WO+SObG5mIq85y5lMo+Z3bt2qVu3brpqquu0urVqz1jYZc5U1oEnlL66quvdPfddysgIEBRUVEyDEOrV6/WTTfdJEl6+eWXNXPmTDmdTtWqVUsZGRnavn27unfvriuuuEKxsbGe75glafPmzQoMDFTbtm3L65As483Y5OTkqGbNmkW+dz+/vqd58+bldUhlMm3aNM2fP1/Lly/X3Xff7dkeFxenbdu26dSpU55tGzduVFxcnKf993MiLy9P27dvV1xcnC3mjC/GxQ7zRTI/NhdTmefMxVT2OZORkaFbb71VTZo00Zo1azxn1iV7zJkyKe/LxPyZfnfp9eHDhw2n02m8/vrrxoEDB4zRo0cb9evXN3777TfDMAzj448/Nq688kpj3bp1xs6dO40uXboYd9xxh+ezli1bZoSFhRkff/yxsWXLFqNVq1bGuHHjyuGorGHl2Nx+++1G27ZtjS1bthjJyclGfHy80bt373I4qrLbvXu3ERAQYDz11FNF7vGRkZFhnD171mjZsqUxePBgY+fOnUZiYmKR+xMdPHjQCA4ONhITEz33m2nTpo3nfjMVec74clwq8nwxDO/G5vemTp1a7PLryjpnfq+kcanMc2bIkCFGZGSksXfv3iL9zv/9XJHnTFkReC7i97/UDcMwVq5caTRv3txwOp1Gjx49jJSUlCL7z5gxw4iKijKuvPJKIyEhwcjJySnSnpiYaNSpU8eoUaOGcd999xmnTp26HIfhE1aOzbFjx4zhw4cbERERRvXq1Y2hQ4cax44du1yH4pXExERDUokvwzCMH3/80ejWrZsRFBRktGrVyvjyyy+L9F+1apXRtGlTIyQkxLj55puN1NTUYp9fEeeML8elIs8Xw/B+bM4r6Rf7+c+vjHPmvJLGpbLOmcLCQiMkJKTEfr+/905FnTNl5TAMw/DhCSQAAIByxxoeAABgewQeAABgewQeAABgewQeAABgewQeAABgewQeAABgewQeAABgewQeAABgewQeAH5r8ODBcjgcmjdv3gX3eeONN+RwOPTwww9fxsoAVDTcaRmA3zp69KhatGihgIAA7d27V7Vr1y7S/uuvv6p58+YKDQ3V7t27Va1atXKqFIC/4wwPAL9Vr149Pf/888rOztbEiROLtT/++OM6duyY5s2bR9gBcFGc4QHg1woKCtSxY0dt375dGzduVJcuXSRJGzZsULdu3TR48GAtX768nKsE4O84wwPArwUEBGj+/PlyOBwaO3asCgsLVVBQoLFjx6pWrVp65ZVXJEnZ2dkaN26c6tevr6CgILVo0UKvvPKK/vffdNu2bdOAAQMUGRmpwMBA1alTR0OGDNHhw4c9+zz77LMKDg7Wxx9/rLp166p69ep66623LutxA7BWlfIuAAAuJTY2VqNGjdLrr7+ut99+W3l5edqxY4f+/ve/KzIyUidPnlS3bt2Unp6uMWPGKDo6WuvWrdP48eO1b98+vfbaa5KkHTt2KD4+Xk2aNNHkyZPldDq1adMmLVmyRPv379eWLVs8P/PMmTMaOXKkJkyYoPz8fMXHx5fX4QOwggEAFcDx48eNunXrGvXr1zfq1Klj9OjRw9M2depUo2rVqsYPP/xQpM/kyZMNScZ3331nGIZhjBo1ynA6nUZWVlaR/e6++25Dkmf71KlTDUnGzJkzfXxUAC4XvtICUCHUqFFDs2fP1pEjR5STk6MFCxZ42j788EO1bt1aUVFRcrlcntedd94pSVq5cqUk6fXXX9dPP/2kWrVqefrm5OQoODhYkpSbm1vkZ3br1s3HRwXgcuErLQAVxj333KMhQ4aoU6dOaty4sWf7gQMHdOrUqWKXrZ+XlpYmSXI4HMrKylJiYqJ++OEHHThwQIcOHfKs8yksLCzSr06dOj46EgCXG4EHQIVXUFCg+Ph4TZ06tcT2evXqSZLef/99DRkyRPXq1VOPHj3Uu3dvXX/99friiy+UmJhYrF9AQIBP6wZw+RB4AFR4DRs21G+//aZbbrmlyPbs7GytXbtWTZo0kSRNmjRJTZo00datWxUaGurZ75133rms9QK4/FjDA6DC+8Mf/qDvv/9eq1atKrL9hRde0B//+Eft3LlTkpSVlaUGDRoUCTvp6en66KOPJElnz569fEUDuKw4wwOgwps8ebI+/PBD9e/fX6NGjVKrVq20ceNGLVmyRL1791bv3r0lSb1799Z7772nUaNGKTY2VqmpqVq4cKFOnjwpSfrtt9/K8zAA+BCBB0CFV6tWLW3evFnPPPOMVqxYoQULFigmJkZPP/20Jk2apCuuOHcy+/wjKD755BO9/fbbio6O1rBhw3TXXXepa9euWrduna677rpyPhoAvsCjJQAAgO2xhgcAANgegQcAANgegQcAANgegQcAANgegQcAANgegQcAANgegQcAANgegQcAANgegQcAANgegQcAANgegQcAANgegQcAANje/wc3Be9ErkACqAAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.rcParams['font.family'] = 'arial'\n",
    "plt.bar(years, counts, width=1, linewidth=1, edgecolor='k')\n",
    "plt.ylabel('Number of structures', fontsize=13)\n",
    "plt.xlabel('Year', fontsize=13)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Parse data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import namedtuple\n",
    "import pprint\n",
    "from word2number import w2n\n",
    "import regex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "field = ('precursors', 'synthesis_method', 'solvent', 'temperature', 'time', 'pressure', 'cooling', 'pH_adjustment', 'washing', 'filtration', 'drying')\n",
    "class Recipt(object):\n",
    "    precursors = None\n",
    "    # ratio = None\n",
    "    synthesis_method = None\n",
    "    solvent = None\n",
    "    temperature = None\n",
    "    time = None\n",
    "    pressure = None\n",
    "    cooling = False\n",
    "    pH_adjustment = False\n",
    "    washing = False\n",
    "    filtration = False\n",
    "    drying = False\n",
    "\n",
    "    def to_dict(self):\n",
    "        return {key: getattr(self, key) for key in field}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = iter(range(5000))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "time_exception = {\n",
    "    'half an hour': '30 min',\n",
    "    '0.5 h': '30 min',\n",
    "    '0.5-h': '30 min',\n",
    "    '120-h': '120 hours',\n",
    "    '1 hour 45 minutes': '105 min',\n",
    "}\n",
    "\n",
    "temperature_exception = {\n",
    "    '36 °C for 2 days, and then at 75 °C for another 3 days': '75 °C',\n",
    "    'mild heating': '50 °C',\n",
    "    'boiling': '100 °C',\n",
    "    'refrigerator temperature': '0 °C',\n",
    "    'hot': '100 °C',\n",
    "    '100 C': '100 °C',\n",
    "    'moderately heated': '50 °C'\n",
    "}\n",
    "\n",
    "pressure_exception = {\n",
    "    'ca. 1.1 MPa': '10 atm',\n",
    "    '~10-3 Torr': '0 atm',\n",
    "    'reduced': '0 atm',\n",
    "    '2.2 atm': '2.2 atm',\n",
    "    '3 bar': '3 atm',\n",
    "    '30psi': '2 atm'\n",
    "\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "def parse_number(number):\n",
    "    number = regex.sub(r\"(^|\\s)(ca\\.|~|about|±\\s?(\\d|\\.)+)(\\s|\\d|$)\", \"\", number).strip()\n",
    "\n",
    "    try:\n",
    "        number = int(number)\n",
    "    except Exception:\n",
    "        pass\n",
    "    else:\n",
    "        return number\n",
    "    \n",
    "    try:\n",
    "        number = float(number)\n",
    "    except Exception:\n",
    "        pass\n",
    "    else:\n",
    "        return number\n",
    "    \n",
    "    try:\n",
    "        number = w2n.word_to_num(number)\n",
    "    except Exception:\n",
    "        pass\n",
    "    else:\n",
    "        return number\n",
    "    \n",
    "    if number in ['half-an-', 'half']:\n",
    "        return 0.5\n",
    "    \n",
    "    try:\n",
    "        if \"-\" in number:\n",
    "            split = '-'\n",
    "        elif 'to' in number:\n",
    "            split = 'to'\n",
    "        elif 'or' in number:\n",
    "            split = 'or'\n",
    "        \n",
    "        a, b = number.split(split)\n",
    "        number = (int(a) + int(b)) / 2\n",
    "    except Exception:\n",
    "        pass\n",
    "    else:\n",
    "        return number\n",
    "\n",
    "\n",
    "    # print (number)\n",
    "    raise ValueError()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "def parse_temperature(temperature: str, syn_method: str):\n",
    "    temperature = regex.sub(r\"(approx\\.|approximately|~)\", \"\", temperature).strip()\n",
    "    temperature = temperature_exception.get(temperature, temperature)\n",
    "\n",
    "    if \"°C\" in temperature:\n",
    "        temperature = temperature.replace(\"°C\", \"\").strip()\n",
    "        temperature = parse_number(temperature)\n",
    "        temperature = f\"{temperature} °C\"\n",
    "    elif '℃' in temperature:\n",
    "        temperature = temperature.replace(\"°C\", \"\").strip()\n",
    "        temperature = parse_number(temperature)\n",
    "        temperature = f\"{temperature} °C\"\n",
    "    elif \"°F\" in temperature:\n",
    "        raise ValueError()\n",
    "    elif \"°\" in temperature:\n",
    "        temperature = temperature.replace(\"°\", \"\").strip()\n",
    "        temperature = parse_number(temperature)\n",
    "        temperature = f\"{temperature} °C\"\n",
    "    elif \"K\" in temperature:\n",
    "        temperature = parse_number(temperature.replace(\"K\", \"\").strip())\n",
    "        temperature = f\"{temperature - 273} °C\"\n",
    "    elif temperature in ['room temperature', 'air atmosphere', 'ambient temperature', 'reflux', 'refluxed', 'ambient', 'ambient conditions']:\n",
    "        temperature = '25 °C'\n",
    "    elif not temperature:\n",
    "        if syn_method in ['solvothermal synthesis', 'hydrothermal synthesis']:\n",
    "            temperature = None\n",
    "        else:\n",
    "            temperature = '25 °C'\n",
    "    else:\n",
    "        print ('temperature:', temperature)\n",
    "        temperature = None\n",
    "\n",
    "    return temperature\n",
    "\n",
    "def parse_pressure(pressure: str, syn_method: str):\n",
    "    pressure = pressure.strip()\n",
    "\n",
    "    if pressure in ['autogenous', 'autogeous',  'auto-generated', 'autogenerated', 'self-generated', 'autogenious', 'autogenously atmospheric pressure', 'autogeneous', 'sealed', 'autogenously applied', 'autogenously', 'autogenous pressure', 'auto-genous']:\n",
    "        pressure = 'autogenous'\n",
    "    elif pressure in ['room pressure', 'air', 'air atmosphere', 'ambient']:\n",
    "        pressure = '1 atm'\n",
    "    elif pressure in ['almost vacuum', 'vacuume', 'near-vacuum', 'vacuum', ]:\n",
    "        pressure = '0 atm'\n",
    "    elif not pressure:\n",
    "        if syn_method in ['solvothermal synthesis', 'hydrothermal synthesis']:\n",
    "            pressure = 'autogenous'\n",
    "        else:\n",
    "            pressure = '1 atm'\n",
    "    elif pressure in pressure_exception:\n",
    "        pressure = pressure_exception['pressure']\n",
    "    else:\n",
    "        print ('pressure: ', pressure)\n",
    "        pressure = None\n",
    "\n",
    "    return pressure\n",
    "\n",
    "def parse_time(time: str):\n",
    "    time = time_exception.get(time, time)\n",
    "    \n",
    "    unit = regex.search(r\"(?<=\\b|\\d)(days|day|weeks|week|hours|hour|h|minute|min|s)\\b\", time)\n",
    "    if not unit:\n",
    "        return None\n",
    "    unit = unit.group()\n",
    "    time = time.replace(unit, \"\").strip()\n",
    "    if time == 'several':\n",
    "        time = '4'\n",
    "    elif time == 'a few':\n",
    "        time = '2'\n",
    "    elif time == 'a couple of':\n",
    "        time = '2'\n",
    "    elif time == 'an':\n",
    "        time = '1'\n",
    "\n",
    "    time = parse_number(time)\n",
    "    time = f'{time} {unit}'\n",
    "\n",
    "    return time\n",
    "\n",
    "def parse_cooling(cooling, syn_method):\n",
    "    if cooling:\n",
    "        return True\n",
    "    elif syn_method in ['solvothermal synthesis', 'hydrothermal synthesis']:\n",
    "        return True\n",
    "    else:\n",
    "        return False\n",
    "\n",
    "def parse_solvent(operation):\n",
    "    if 'solution' in operation:\n",
    "        solvents = operation['solution']\n",
    "    elif 'solvent' in operation:\n",
    "        solvents = operation['solvent']\n",
    "    else:\n",
    "        return None\n",
    "\n",
    "    sol_ls = []\n",
    "    for sol in solvents:\n",
    "        sol = regex.split(r\"\\band\\b|\\,|\\/\", sol['name'])\n",
    "        for i in sol:\n",
    "            i = i.strip()\n",
    "            i = solvent_dict.get(i, None)\n",
    "            if not i:\n",
    "                continue\n",
    "            elif ',' in i:\n",
    "                sol_ls.extend(i.split(','))\n",
    "            else:\n",
    "                sol_ls.append(i)\n",
    "\n",
    "    sol_ls = list(set(sol_ls))\n",
    "\n",
    "\n",
    "    return sol_ls\n",
    "\n",
    "def parse_washing(washing_solution):\n",
    "    if not washing_solution:\n",
    "        return False\n",
    "    sol = regex.split(r\"\\band\\b|\\,|\\/\", washing_solution)\n",
    "    sol_ls = []\n",
    "    for i in sol:\n",
    "        i = i.strip()\n",
    "        i = solvent_dict.get(i, None)\n",
    "        if not i:\n",
    "            continue\n",
    "        elif ',' in i:\n",
    "            sol_ls.extend(i.split(','))\n",
    "        else:\n",
    "            sol_ls.append(i)\n",
    "    sol_ls = list(set(sol_ls))\n",
    "    return sol_ls\n",
    "\n",
    "\n",
    "def parse_dataset(process):\n",
    "    data = Recipt()\n",
    "\n",
    "    for operation in process:\n",
    "        syn_method = operation['synthesis method']\n",
    "        if syn_method in ['chemical synthesis', 'solvothermal synthesis', 'sonochemical synthesis', 'hydrothermal synthesis']:\n",
    "            data.precursors = [i['name'] for i in operation['precursor']]\n",
    "\n",
    "            data.solvent = parse_solvent(operation)\n",
    "            data.synthesis_method = syn_method\n",
    "            data.temperature = parse_temperature(operation['temperature'], syn_method)\n",
    "            data.pressure = parse_pressure(operation['pressure'], syn_method)\n",
    "            data.time = parse_time(operation['time'])\n",
    "            data.cooling = parse_cooling(operation.get('cooling rate'), syn_method)\n",
    "\n",
    "            pass\n",
    "        elif syn_method == 'filtration':\n",
    "            data.filtration = True\n",
    "        elif syn_method == 'washing':\n",
    "            data.washing = parse_washing(operation['washing solution'])\n",
    "        elif syn_method == 'pH adjustment':\n",
    "            data.pH_adjustment = operation['pH']\n",
    "        elif syn_method == 'drying':\n",
    "            data.drying = True\n",
    "    \n",
    "    return data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'synthesis method': 'solvothermal synthesis',\n",
      "  'precursor': [{'name': 'H2MDP', 'amount': '0.020', 'unit': 'g'},\n",
      "                {'name': 'ZnNO3⋅6H2O', 'amount': '0.297', 'unit': 'g'}],\n",
      "  'solvent': [{'name': 'distilled water', 'amount': '10', 'unit': 'mL'}],\n",
      "  'pressure': '',\n",
      "  'temperature': '180°C',\n",
      "  'time': '18 h',\n",
      "  'heating rate': '',\n",
      "  'cooling rate': 'slowly to 25°C'}]\n",
      "------------------------------\n",
      "{'precursors': ['H2MDP', 'ZnNO3⋅6H2O'],\n",
      " 'synthesis_method': 'solvothermal synthesis',\n",
      " 'solvent': ['water'],\n",
      " 'temperature': '180 °C',\n",
      " 'time': '18 h',\n",
      " 'pressure': 'autogenous',\n",
      " 'cooling': True,\n",
      " 'pH_adjustment': False,\n",
      " 'washing': False,\n",
      " 'filtration': False,\n",
      " 'drying': False}\n"
     ]
    }
   ],
   "source": [
    "# Test\n",
    "i = 8416\n",
    "process = sorted_output[i]['processes']\n",
    "pprint.pprint(process, sort_dicts=False)\n",
    "print ('------------------------------')\n",
    "pprint.pprint(parse_dataset(process).to_dict(), sort_dicts=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Make Prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('You act like a MOF synthesis expert. I will give you precursors of MOF and '\n",
      " 'you have to suggest the appropriate synthesis conditions for this MOF. You '\n",
      " 'have to suggest the synthesis conditions in JSON format and contain these '\n",
      " \"categories : ['precursor', 'synthesis_method', 'solvent', 'temperature', \"\n",
      " \"'time', 'pressure', 'cooling', 'pH_adjustment', 'washing', 'filtration', \"\n",
      " \"'drying'].\")\n"
     ]
    }
   ],
   "source": [
    "PROMPT = \"\"\"You act like a MOF synthesis expert. I will give you precursors of MOF and you have to suggest the appropriate synthesis conditions for this MOF. You have to suggest the synthesis conditions in JSON format and contain these categories : ['precursor', 'synthesis_method', 'solvent', 'temperature', 'time', 'pressure', 'cooling', 'pH_adjustment', 'washing', 'filtration', 'drying'].\"\"\"\n",
    "pprint.pprint(PROMPT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 9197/9197 [00:00<00:00, 38769.64it/s]\n"
     ]
    }
   ],
   "source": [
    "jsonl = []\n",
    "year = []\n",
    "# solvent_count = Counter()\n",
    "for i, data in tqdm(enumerate(sorted_output), total=len(sorted_output)):\n",
    "    process = data['processes']\n",
    "    try:\n",
    "        process = parse_dataset(process)\n",
    "        # solvent_count.update(process.solvent)\n",
    "    except Exception:\n",
    "        continue\n",
    "\n",
    "    json_file = {\n",
    "        \"messages\": [\n",
    "            {\"role\": \"system\", \"content\": PROMPT},\n",
    "            {\"role\": \"user\", \"content\": json.dumps(data['precursor'])},\n",
    "            {\"role\": \"assistant\", \"content\": json.dumps(process.to_dict())},\n",
    "        ]\n",
    "    }\n",
    "    jsonl.append(json_file)\n",
    "    year.append(data['year'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(5):\n",
    "    train, test = train_test_split(jsonl, random_state=i)\n",
    "\n",
    "    with open(f'train_{i}.jsonl', 'w') as f:\n",
    "        for db in train:\n",
    "            if \"\\\", \\\"\" in db['messages'][1]['content']:\n",
    "                f.write(json.dumps(db) + \"\\n\")\n",
    "\n",
    "    with open(f'test_{i}.jsonl', 'w') as f:\n",
    "        for db in test:\n",
    "            if \"\\\", \\\"\" in db['messages'][1]['content']:\n",
    "                f.write(json.dumps(db) + \"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Year dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(6783, 2261)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(train), len(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_count = [0, 0]\n",
    "year_list = [1998, 2001, 2004, 2007, 2010, 2013, 2016]\n",
    "for d, y in zip(jsonl, year):\n",
    "    if not y:\n",
    "        continue\n",
    "\n",
    "    for y2 in year_list:\n",
    "        if y <= y2:\n",
    "            with open(f'year_test/year_{y2}.jsonl', 'a') as f:\n",
    "                f.write(json.dumps(d) + \"\\n\")\n",
    "    # if y > 2016:\n",
    "    with open(f'year_test/year_test_{y}.jsonl', 'a') as f:\n",
    "        f.write(json.dumps(d) + \"\\n\")\n",
    "    "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llmminer",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
